# This file duplicates all the figures in the main file of the paper ----------
library(tidyverse)
library(cowplot) # for arranging plots
library(readxl)
library(lubridate)
library(ggpubr)
library(openintro)
library(patchwork)
#theme_set(theme_cowplot())
source("lib/fix-names.R")
#-------------------------------------------------------#
# Figure 1: Map
#-------------------------------------------------------#
load('data/city-aggregate.RData')
latlongcities <- read.csv("raw-data/2017CensusPlaceLocations.csv")

jurisdictionaidandlatlong <- right_join(latlongcities,agency,
                                        by=c("NAME"="City","USPS"="State_abbrv"))

juris.map <- jurisdictionaidandlatlong %>%
  group_by(USPS,NAME,INTPTLAT,INTPTLONG)%>%
  summarise(sumallaid=sum(sumallLESOaidBG)) %>%
  filter(sumallaid>0 & USPS!="AK")

usa <- map_data("state")

ditch_the_axes <- theme(
  axis.text = element_blank(),
  axis.line = element_blank(),
  axis.ticks = element_blank(),
  panel.border = element_blank(),
  panel.grid = element_blank(),
  axis.title = element_blank()
)

#grayscale
citiesmap <- ggplot() + geom_polygon(data = usa, aes(x=long, y = lat,group=group),color="gray",fill="NA")

#add color
citiesmap+geom_point(data = juris.map, aes(x = INTPTLONG, y = INTPTLAT,colour=sumallaid,
                                           size=sumallaid),alpha=0.4)+
  scale_colour_continuous(name = "Sum Aid",low="royalblue",high="blue4")+
  theme_bw()+ditch_the_axes+ theme(legend.position="none")

ggsave("figures/citiesmap.pdf",width=11,height=7)

rm(list=ls())
#-------------------------------------------------------#
# Figure 2: item discrepancies
#-------------------------------------------------------# 
source("lib/fix-names.R")
path <- "raw-data/DISP_AllStatesAndTerritories_03312018.xlsx"
LESOlist <- path %>%
  excel_sheets() %>%
  purrr::set_names() %>%
  map(read_excel, path = path)

LESO <- do.call(rbind,LESOlist) %>%
  rename(agency_name=`Station Name (LEA)`,state_abbrv=State)%>%
  mutate(State=abbr2state(state_abbrv),
         `Ship Date` = ymd_hms(`Ship Date`),
         year = year(`Ship Date`)) %>%
  filter(!is.na(State)) %>%
  identify_nonlocal(.)

# Read in fips data and match by agency name -------------
fips <- read_csv("raw-data/clean-countiesandfips.csv") %>%
  select(agency,agency_name,State,city,County)

mdata <- left_join(LESO,fips,
                   by = c("State"="State",
                          "agency_name"="agency_name")) %>%
  fix_county_names(.) %>%
  mutate(county = toupper(str_replace_all(County,c(" County" = "", " City and Borough" = "",
                                                   " Borough" = "",
                                                   " Parish" = "",
                                                   "De Witt" = "Dewitt",
                                                   "LaSalle" = "La salle",
                                                   "LaPorte" = "La Porte",
                                                   "DeSoto" = "De Soto"))),
         totalvalue = Quantity * `Acquisition Value`) %>%
  separate(NSN,c("FSC","country","uniqueno","uniqueno2"),remove = FALSE)%>%
  separate(FSC,into=c("federalsupplycode","itemtype"),sep = 2) %>%
  filter(year>2005 & year<2015)

# EC "NAVAJO POLICE DEPT, TUBA CTY DIST" is in military data but not counties
# they had 3 trucks
mdata$city[mdata$agency_name=="NAVAJO POLICE DEPT, TUBA CTY DIST"] <- "Tuba City"
mdata$County[mdata$agency_name=="NAVAJO POLICE DEPT, TUBA CTY DIST"] <- "Coconino County"

mdata$city[mdata$agency_name=="WI DEPT OF JUSTICE, DCI (LEA)"] <- "Madison"
mdata$County[mdata$agency_name=="WI DEPT OF JUSTICE, DCI (LEA)"] <- "Dane County"

# NPR FOIA data
npr <- read_csv("data/LESO data - all states.csv") %>%
  rename(NSN=nsn) %>%
  mutate(ship_date = mdy_hm(ship_date),
         year = year(ship_date),
         county = str_replace_all(county,c("SAINT " = "ST. ", "DE KALB" = "DEKALB",
                                           "DE SOTO" = "DESOTO", "PRINCE GEORGES" = "PRINCE GEORGE'S",
                                           "QUEEN ANNES" = "QUEEN ANNE'S", "MARYS" = "MARY'S",
                                           "SAINTE" = "STE.",
                                           "ST JOSEPH" = "ST. JOSEPH",
                                           "DESOTO" = "DE SOTO",
                                           "DE WITT" = "DEWITT",
                                           "RICHMOND CITY" = "RICHMOND")))

mdataitemcosts <- mdata %>%
  group_by(county,state_abbrv,NSN,`Ship Date`)%>%
  summarise(dailycostfoia = sum(totalvalue,na.rm=T))%>%
  rename(ship_date=`Ship Date`,state=state_abbrv)

npritemcosts <- npr %>%
  group_by(county,state,NSN,ship_date)%>%
  summarise(dailycostnpr = sum(total_cost,na.rm=T))

difference <- full_join(npritemcosts,mdataitemcosts) %>%
  mutate(dailycostfoia = if_else(is.na(dailycostfoia), 0, dailycostfoia),
         dailycostnpr = if_else(is.na(dailycostnpr), 0, dailycostnpr),
         differenceincosts = abs(dailycostnpr - dailycostfoia),
         year = year(ship_date))

exactmatches <- difference %>%
  filter(dailycostfoia==dailycostnpr) %>%
  filter(dailycostnpr>0) #10,580 obs, but only 2,617 of those are positive, about 2%
xaxis <- difference %>%
  filter(dailycostnpr==0 & dailycostfoia>0) #10,205 obs, about 7.4%
yaxis <- difference %>%
  filter(dailycostnpr>0 & dailycostfoia==0) #107,567 obs, about 77.8%

agg <- ggplot(difference,aes(x=1+dailycostfoia,y=1+dailycostnpr))+
  geom_point(alpha=0.2)+theme_bw()+ scale_x_continuous(trans = 'log10',labels = scales::dollar) +
  scale_y_continuous(trans = 'log10',labels = scales::dollar)+xlab("LESO Value")+
  ylab("NPR Value")+ theme(axis.text.x = element_text(angle = 45, hjust = 1),
                          axis.text.y = element_text(angle = 45, hjust = 1))+
  annotate("text", x = 12, y=c(5000000,18000000,70000000),
           label = c("2% Exact Matches", "77.8% on Y-Axis", "7.4% on X-Axis"),
           hjust = 0,size=6)
yearfacet <- ggplot(difference,aes(x=1+dailycostfoia,y=1+dailycostnpr))+
  geom_point(alpha=0.2)+theme_bw()+ scale_x_continuous(trans = 'log10',labels = scales::dollar) +
  scale_y_continuous(trans = 'log10',labels = scales::dollar)+xlab("LESO Value")+
  ylab("NPR Value")+ theme(axis.text.x = element_text(angle = 45, hjust = 1),
                          axis.text.y = element_text(angle = 45, hjust = 1)) +
  facet_wrap(~year)
ggarrange(agg, yearfacet)
ggsave("figures/NSNdifferencesaggandyear.pdf",width=18, height=8)

#-------------------------------------------------------#
# SI Figure 8: Item value variance in the LESO data
#-------------------------------------------------------#
nsnsd <- mdata %>%
  group_by(NSN) %>%
  dplyr::summarize(varfoia = sd(`Acquisition Value`,na.rm=T))  #2179 NSNs are only ever shipped to one county ever (56.9%); 1505 obs (39.3%) are shipped multiple times and always have same price; 142 (3.7%) are shipped multiple times and have different prices

nsnsd.nona <- nsnsd %>%
  filter(!is.na(varfoia)) #1647 observations
summary(nsnsd.nona$varfoia)

nsnsd.zero <- nsnsd %>%
  filter(varfoia==0)

nsnsd.pos <- nsnsd %>%
  filter(varfoia>0)
summary(nsnsd.pos$varfoia)

maxvar <- nsnsd %>% top_n(n=1) #1510-DS-FIX-WNGA
summary(maxvar$varfoia) #8158305

maxvarleso <- mdata %>% filter(NSN=="1510-DS-FIX-WNGA")
length(unique(maxvarleso$agency_name)) #9 agencies
summary(maxvarleso$`Acquisition Value`) #min is 9000, max is 5390000 

ggplot(nsnsd.pos, aes(x=1+varfoia)) + 
  geom_density(color='black', fill='grey') + theme_bw() +
  labs(x='Standard Deviation in Value by NSN',
       y = 'Density')   + scale_x_continuous(trans = 'log10',labels = scales::dollar) #of those with multiple items shipped (1647), 142 have different prices!

ggsave("figures/NSNdifferencesvariation.pdf")

rm(list=ls())

#-------------------------------------------------------#
# Figure 3: BG replication
#-------------------------------------------------------# 
options(scipen=999)
load('data/results-BG.RData')
source('lib/figures-functions.R')

# extract main coefficient of interest from exact BG replication ------
BG_orig <- unnest(BG_results, cols = c(`map(BG_list, tidy)`)) %>%
  filter(term == "`ltotal_cost(fit)`") %>%
  mutate(var = recode(term, "`ltotal_cost(fit)`" = "aid"),
         model = c('Total Crime Rate','Homicide', 'Robbery',
                   'Assault', 'Burglary','Larceny',"Vehicle Theft"),
         group = 'BG County')

# extract main coefficient of interest from BG county replication ------
BG_county <- unnest(BG_county_results, cols = c(`map(BG_county_list, tidy)`)) %>%
  filter(term == "`log(1 + sumallLESOaidBG_lag)(fit)`") %>%
  mutate(var = recode(term, "`log(1 + sumallLESOaidBG_lag)(fit)`" = "aid"),
         model = c('Total Crime Rate','Homicide', 'Robbery',
                   'Assault', 'Burglary','Larceny',"Vehicle Theft"),
         group = 'BG County Rep')

# extract main coefficient of interest from BG agency replication ------
BG_agency <- unnest(BG_agency_results, cols = c(agency)) %>%
  filter(term == "`log(1 + sumallLESOaidBG_lag)(fit)`") %>%
  mutate(var = recode(term, "`log(1 + sumallLESOaidBG_lag)(fit)`" = "aid"),
         model = c('Total Crime Rate','Homicide', 'Robbery',
                   'Assault', 'Burglary','Larceny',"Vehicle Theft"),
         group = 'BG Agency Rep')

## bind together
allBG <- bind_rows(BG_orig, BG_county, BG_agency) %>%
  mutate(low95 = estimate - (abs(qnorm(.025))*std.error),
         up95 = estimate + (abs(qnorm(.025))*std.error),
         low99 = estimate - (abs(qnorm(.005))*std.error),
         up99 = estimate + (abs(qnorm(.005))*std.error),
         county = recode(group, 'BG County' = 'County',
                         'BG County Rep' = 'County',
                         'BG Agency Rep' = 'Agency'))


# specify factor so groups are ordered correctly on plots
allBG$group <- factor(allBG$group, levels = c("BG County", "BG County Rep", "BG Agency Rep"))
allBG$model <- factor(allBG$model, levels = c("Total Crime Rate",
                                              "Homicide","Robbery","Assault",
                                              "Burglary","Larceny","Vehicle Theft"))


allBG$nice_names <- ifelse(allBG$group=="BG County","Bove & Gavrilova\nCounty",
                            ifelse(allBG$group=="BG County Rep","Replication\nCounty","Replication\nAgency"))

allBG$nice_names <- factor(allBG$nice_names, levels = c("Bove & Gavrilova\nCounty","Replication\nCounty","Replication\nAgency"))

# We want each facet limits to be consistent across county and agency. 
# Do each facet separately and then put together
# 1. Calculate what the limits of each facet should be so 0 is in the middle

allBG <- allBG %>% group_by(model) %>%
  mutate(max_bottom = max(abs(low99)),
         max_top = max(abs(up99)),
         lim_height = ifelse(max_bottom>max_top,max_bottom,max_top)) %>%
  select(-max_bottom, -max_top) %>%
  mutate(county = factor(county, levels = c("County","Agency")))
allBG$color_var <- allBG$county
# set up parameters
mycolors <- c("seagreen","blue")
models <- unique(allBG$model)
# 2. Plot each facet individually
p <- map2(list(allBG), models, ~plot_individual_facets_labels(data = .x, m = .y))
p2 <- map2(list(allBG), models, ~plot_individual_facets_no_labels(data = .x, m = .y))

# put plots together
all_plots <- p2[[1]] / p2[[2]] / p2[[3]] / p2[[4]] / p2[[5]] / p2[[6]] / p[[7]]

save_plot("figures/plot7by2_facets_BG.pdf",
          all_plots,
          ncol=2,base_height = 6,base_width = 4)

#Create table for figure caption
allBG$estimate <- round(allBG$estimate, 2)
allBG$up95 <- round(allBG$up95, 2)
allBG$low95 <- round(allBG$low95, 2)
allBG$p.value <- if_else(allBG$p.value>=0.001, paste("=", round(allBG$p.value,3), sep=" "), "< 0.001")

save(allBG, file = 'data/allBG_results.RData')

options(scipen=0, digits=7)
rm(list=ls())

#-------------------------------------------------------#
# Figure 4: HPBM replication
#-------------------------------------------------------# 
# elisha fix -----------
# left estimates not printing - color must be getting messed up
# pvalues also not printing
options(scipen=999)
load('data/results-HPBM.RData')
source('lib/figures-functions.R')

HPBM_items_orig <- unnest(HPBM_items_results, cols = c(`map(harris_items_list, tidy)`)) %>%
  filter(term == "`log_sum_items_lag(fit)`") %>%
  mutate(var = recode(term, "`log_sum_items_lag(fit)`" = "sum_items"),
         model = c('Homicide','Robbery','Assault',"Vehicle Theft"),
         group = 'HPBM County Items')
HPBM_value_orig <- unnest(HPBM_value_results, cols = c(`map(harris_value_list, tidy)`)) %>%
  filter(term == "`log_sum_value_ttl_lag(fit)`") %>%
  mutate(var = recode(term, "`log_sum_value_ttl_lag(fit)`" = "sum_value"),
         model = c('Homicide','Robbery','Assault',"Vehicle Theft"),
         group = 'HPBM County Value')

HPBM_items_county <- unnest(HPBM_county_items_results, cols = c(`map(county_items_list, tidy)`)) %>%
  filter(term == "`log(1 + sumallLESOquantityHARRIS_lag)(fit)`") %>%
  mutate(var = recode(term, "`log(1 + sumallLESOquantityHARRIS_lag)(fit)`" = "sum_items"),
         model = c('Homicide','Robbery','Assault',"Vehicle Theft"),
         group = 'HPBM County Rep Items')

HPBM_value_county <- unnest(HPBM_county_value_results, cols = c(`map(county_value_list, tidy)`)) %>%
  filter(term == "`log(1 + sumallLESOaidHARRIS_lag)(fit)`") %>%
  mutate(var = recode(term, "`log(1 + sumallLESOaidHARRIS_lag)(fit)`" = "sum_value"),
         model = c('Homicide','Robbery','Assault',"Vehicle Theft"),
         group = 'HPBM County Rep Value')


HPBM_items_agency <- unnest(HPBM_agency_items_results, cols = c(`map(agency_items_list, tidy)`)) %>%
  filter(term == "`log(1 + sumallLESOquantityHARRIS_lag)(fit)`") %>%
  mutate(var = recode(term, "`log(1 + sumallLESOquantityHARRIS_lag)(fit)`" = "sum_items"),
         model = c('Homicide','Robbery','Assault',"Vehicle Theft"),
         group = 'HPBM Agency Rep Items')

HPBM_value_agency <- unnest(HPBM_agency_value_results, cols = c(`map(agency_value_list, tidy)`)) %>%
  filter(term == "`log(1 + sumallLESOaidHARRIS_lag)(fit)`") %>%
  mutate(var = recode(term, "`log(1 + sumallLESOaidHARRIS_lag)(fit)`" = "sum_value"),
         model = c('Homicide','Robbery','Assault',"Vehicle Theft"),
         group = 'HPBM Agency Rep Value')

## bind together
allHPBM <- bind_rows(HPBM_items_orig, HPBM_value_orig,
                     HPBM_items_county, HPBM_value_county,
                     HPBM_items_agency, HPBM_value_agency) %>%
  mutate(low95 = estimate - (abs(qnorm(.025))*std.error),
         up95 = estimate + (abs(qnorm(.025))*std.error),
         low99 = estimate - (abs(qnorm(.005))*std.error),
         up99 = estimate + (abs(qnorm(.005))*std.error),
         county = recode(group, 'HPBM County Items' = 'County',
                         'HPBM County Value' = 'County',
                         'HPBM County Rep Items' = 'County',
                         'HPBM County Rep Value' = 'County',
                         'HPBM Agency Rep Items' = 'Agency',
                         'HPBM Agency Rep Value' = 'Agency'))


# specify factor so groups are ordered correctly on plots
allHPBM$group <- factor(allHPBM$group, levels = c("HPBM County Items",
                                                  "HPBM County Value",
                                                  "HPBM County Rep Items",
                                                  "HPBM County Rep Value",
                                                  "HPBM Agency Rep Items",
                                                  "HPBM Agency Rep Value"))
allHPBM$model <- factor(allHPBM$model, levels = c("Homicide","Robbery","Assault","Vehicle Theft"))


allHPBM <- allHPBM %>%
  mutate(nice_names = group)

allHPBM$nice_names <- recode_factor(allHPBM$nice_names,
                                    "HPBM County Items" = "HPBM County\nItems",
                                    "HPBM County Value" = "HPBM County\nValue",
                                    "HPBM County Rep Items" = "HPBM County\nRep Items",
                                    "HPBM County Rep Value" = "HPBM County\nRep Value",
                                    "HPBM Agency Rep Items" = "HPBM Agency\nRep Items",
                                    "HPBM Agency Rep Value" = "HPBM Agency\nRep Value")

# We want each facet limits to be consistent across county and agency. 
# Do each facet separately and then put together
# 1. Calculate what the limits of each facet should be so 0 is in the middle

allHPBM <- allHPBM %>% group_by(model) %>%
  mutate(max_bottom = max(abs(low99)),
         max_top = max(abs(up99)),
         lim_height = ifelse(max_bottom>max_top,max_bottom,max_top)) %>%
  select(-max_bottom, -max_top) %>%
  mutate(county = factor(county, levels = c("County","Agency")))

allHPBM$model_type <- "Items"
allHPBM$model_type[grep('value',allHPBM$group,ignore.case = T)] <- "Value"
allHPBM$color_var <- paste(allHPBM$county, allHPBM$model_type)
# set up parameters
mycolors <- c("blue","purple","darkslategray3","seagreen")
models <- unique(allHPBM$model)

# 2. Plot each facet individually
p <- map2(list(allHPBM), models, ~plot_individual_facets_labels(data = .x, m = .y))
p2 <- map2(list(allHPBM), models, ~plot_individual_facets_no_labels(data = .x, m = .y))

# put plots together
all_plots <- p2[[1]] / p2[[2]] / p2[[3]] / p[[4]]
all_plots
save_plot("figures/plot7by2_facets_harris.pdf",
          all_plots,
          ncol=2,base_height = 6,base_width = 4)

#Create table for figure caption
allHPBM$estimate <- round(allHPBM$estimate, 2)
allHPBM$up95 <- round(allHPBM$up95, 2)
allHPBM$low95 <- round(allHPBM$low95, 2)
allHPBM$p.value <- if_else(allHPBM$p.value>=0.001, paste("=", round(allHPBM$p.value,3), sep=" "), "< 0.001")

save(allHPBM, file = 'data/allHPBM_results.RData')

options(scipen=0, digits=7)
rm(list=ls())
